global typeden=1
include "set_directories.do"
set memory 64g
global lf "LF"



*** Checking and cleaning the aggregate data ****
*------------------------------------------------------------------------
/*
*** These are the programs to create the aggregate datasets ****

include "code/0_Cleaning_WDI.do" /*This file gdp_pc_ppp and gdp_pc_curr (last release 2020) */
include "code/0_Cleaning_WIOD.do" /*2016 relese (it has not been updated)*/
include "code/0_Cleaning_KLEMS.do" /*Uses the last release 2019*/ /*Notice it does not have the aggregated sectors */
include "code/0_Cleaning_KLEMS_India_individualfile.do"
include "code/0_Cleaning_GGDC.do"
include "code/0_Cleaning_OECD_STAN_rev4.do"
include "code/0_Cleaning_OECD_STAN_SDBS.do" \*this and "0_Cleaning_OECD_STAN_SDBS_10plus are old files I need to clean and likely erase*\
include "code/0_Cleaning_OECD_STAN_SDBS_bysize.do"

include "code/0_Cleaning_OECD_EXP.do"  
include "code/0_Cleaning_OECD_MNC_outward.do" **** Notice this files generates an inward file based on outward information. That is wood, BUT this files does not use that product. Instead uses the MNC_inward file with the TOTAL (from that inward file) and generate a file with both inward and total (BUT IT DOES NOT INCLUDE THE INWARD FILE MADE OUT OF THE OUTWARD INFORMATION). Nonetheless, all I need is in this file "code/0_Cleaning_OECD_MNC.do" and in that case "code/0_Cleaning_OECD_MNC_outward.do" is redundant. 
NOT is not not redundant. The "code/0_Cleaning_OECD_MNC_outward.do" file creates an INWARD file from an outward file ("${tfp_bef}/data/OECD_MNC\AMNE_OUT_PARTNER.csv"). The output file is called "${output}/oecd_mnc_outward_rev4_restricted.dta" *** inward made out of outward information (in the OECD_MNC_outward.do program) 
Now the "code/0_Cleaning_OECD_MNC.do" file generate an INWARD (+total national) AND OUTWARD MNC file. The outward file is called "${output}/oecd_outward_mnc_rev4_restricted.dta"
Notice that the file names are very similar. The real outward is brought up in the aggregate file, 
We should use the outside to fill the inward with inward made out of outward whenever required ... Notice I will do this in the "check.do" file

include "code/0_Cleaning_OECD_MNC.do"

include "code/0_Cleaning_UNIDO.do" /*Output statistics for develop and developing countries, Manufacturing only*/

include "code/0_Cleaning_tax_mnc.do" /*Output statistics for develop and developing countries, Manufacturing only*/

include "code/0_Cleaning_orbis_historical.do" /*Both for aggregate and firm level statistics*/ /*"${output}/firm_orbis_sales_selected_ctry.dta"*/ /* In contains tables: (new orbis and comparison with original file) */
include "code/firm_tfp_part0_sales.do" /*A very short code now. Ready for regressions*/ /*"${output}/firm_allyears_sales.dta"*/
include "code/aggregate_data_sales_v2.do" /*Last file "${output}/klems_oecd_unido_orbis_sales_emp_exp.dta". MNCs have been incorporated*/
include "code/aggregate_lp_klems.do" /*"${output}/aggregates_tfp_lp_klems.dta"; "${output}/aggregates_tfp_lp_klems_agg.dta"; "${output}/aggregates_tfp_lp_klems_agg_growth.dta" */
include "code/sectoral_weights.do" /*"${output}/sector_weights.dta"*/
*/

*============================================================================
/* NOTES
*(exports) are exports implied by the WIOT (World Input and Output Tables)
*** En donde necesitamos este file "oecd_mnc_outward_rev4_restricted.do"
"${data}/oalpha_ctte_FR.dta" esta en un do file que llamo weights? NO (no sure where I used it)
*/


*------------------------------------------------------------------------
*Files used and NOT USED in the process 
*------------------------------------------------------------------------
*"${intermediate}/orbis-mult_short_final_all_links_20200109.dta"  /*This should be eliminated? What happen here with the consolidad*/
*"${data}/firm_orbis_sales_selected_ctry.dta" /*ARE WE USING THIS FILE???*/
*"${output}/firm_allyears_sales.dta" /*this file uses a newer downloading we did inside ORBIS in order to bring the wage bill. This step we will be avoided in the disclosure code*/




*Recall to drop any file that contains firm level information FROM REGRESSIONS
*-----------------------------------------------------------------------



*Orbis CONSTRUCTED AGGREGATES is done in clean_02_orbis_original. Many things are done there (aggregates with obs with information in certain variables, top 20 firms), but we are not using them in this disclosure. 
*-----------------------------------------------------------------------




*Checking treatment of unconsolidated accounts (this is not in the cleaning code. NO BECAUSE ALL IS UNCOSOLIDATES ACCOUNTS AND LIMITED FINANCIALS)
*------------------------------------------------------------------------
keep if consolidationcode=="U1" | consolidationcode=="U2"
tab consolidationcode
gen rank=1 if consolidationcode=="U1"
replace rank=2 if consolidationcode=="U2"
replace rank=3 if consolidationcode=="C2"
replace rank=4 if consolidationcode=="C1"
sort id_bvd year rank 
by  id_bvd year, sort: gen a=_n==1
keep if a==1
drop a 
drop rank
rename consolidationcode con_code
tab con_code



*Main aggregated file
*------------------------------------------------------------------------
use "${data}/klems_oecd_unido_orbis_sales_emp_exp.dta", clear
drop WAGE_usd LP_QI_klems LP1TFP_I_klems GO_oecd GO_usd_oecd GO_usd_klems GO_klems GO_Q_klems GO_Q_usd_klems VA_oecd VA_usd_oecd VA_Q_oecd VA_Q_usd_oecd VA_usd_klems VA_klems VA_Q_usd_klems VA_Q_klems LAB_usd_klems LAB_klems EMPE_oecd EMP_oecd EMP_klems EMPE_klems H_EMPE_oecd H_EMP_oecd H_EMP_klems H_EMPE_klems COMP_oecd COMP_usd_oecd COMP_usd_klems COMP_klems WAGE_oecd WAGE_usd_oecd LP1_G_klems ratio_exp_wiot
drop EXP_usd_nt TURN_usd_mnc TURN_usd_nt VA_usd_nt EMPE_nt ENT_nt
drop EXP_usd_mnc_out
drop sales_mnc_orbis

label variable COMP_usd "Labor compensation in MM USD"
label variable exports "Exports in MM USD"
label variable GO_usd_nt "Gross Output in MM USD (national total)"
label variable GO_usd_mnc "Gross Output in MM USD at foreign affiliates (inward)"
label variable GO_usd_mnc_out "Gross Output in MM USD of affiliates abroad (outward)"
label variable VA_usd_mnc "Value Added in MM USD at foreign affiliates"
label variable EMPE_mnc "Number of employees (th of workers) at foreign affiliates"
label variable ENT_mnc "Number of foreign affiliates"
label variable sector1 "sector aggregate"
label variable sector  "sector NAICS2"
label variable data_source "Data source for vars: GO_usd, EMPE, VA_usd, VA_Q_usd, LAB_usd"

compress
save, replace



*MNCs aggregated information (inward) [Total output by MNCs (OECD/Eurostat dataset)]
*------------------------------------------------------------------------
clear all
use year sector1 sector isocode GO_usd_mnc ENT_mnc EMPE_mnc using "${data}/klems_oecd_unido_orbis_sales_emp_exp.dta", clear
keep if year==2016
keep if sector1=="Total (TOT)"
keep if isocode=="DK" | isocode=="JP" | isocode=="IT" | isocode=="KR" | isocode=="DE" | isocode=="FR" | isocode=="ES" |  isocode=="MX" |  isocode=="GB" |  isocode=="PL" |  isocode=="GR" |  isocode=="NL" |  isocode=="RO" |  isocode=="BE" |  isocode=="AT" |  isocode=="PT" |  isocode=="FI" |  isocode=="SE" |  isocode=="CZ" |  isocode=="SK" |  isocode=="BG" |  isocode=="HU" |  isocode=="HR" |  isocode=="LV" |  isocode=="SI" |  isocode=="LT" |  isocode=="EE" 
collapse (sum) GO_usd_mnc ENT_mnc EMPE_mnc, by( isocode year)

merge 1:1 isocode year using "${data}/oecd_mnc_outward_rev4_restricted.dta", keepusing(*mnc_out_OECD)
drop if _merge==2
drop _merge
replace GO_usd_mnc=GO_usd_mnc_out_OECD if GO_usd_mnc==. | GO_usd_mnc==0 | GO_usd_mnc_out_OECD>GO_usd_mnc
replace EMPE_mnc=EMPE_mnc_out_OECD if EMPE_mnc==. | EMPE_mnc==0 | EMPE_mnc_out_OECD>EMPE_mnc
replace ENT_mnc=ENT_mnc_out_OECD if ENT_mnc==. | ENT_mnc==0 | ENT_mnc_out_OECD>ENT_mnc
save "${data}/oecd_eurostat_agg_mnc_output.dta", replace


*Notice that the ORBIS aggregate file and the program that generates is an INTERNAL file
*------------------------------------------------------------------------
*use "${data}/orbis_mnc_agg.dta", clear
*orbis_mnc_agg.do file

*------------------------------------------------------------------------
use "${data}/aggregates_tfp_lp_klems_agg.dta", clear
keep year isocode type sector1 ln_gdp_pw_ppp ln_rgdpe year isocode type sector1 ln_LP_ppp_emp_pwt ln_gdp_pw_ppp ln_gdp_pc_ppp ln_rgdpe ln_rgdpo ln_rgdpna ln_ctfp ln_rtfpna ln_hc ln_ck ln_pop ln_RuleofLaw
order year isocode type sector1 ln_gdp_pw_ppp ln_rgdpe year isocode type sector1 ln_LP_ppp_emp_pwt ln_gdp_pw_ppp ln_gdp_pc_ppp ln_rgdpe ln_rgdpo ln_rgdpna ln_ctfp ln_rtfpna ln_hc ln_ck ln_pop ln_RuleofLaw
label variable ln_RuleofLaw "ln RuleofLaw"
label variable sector1 "sector aggregate"
compress
save, replace

*------------------------------------------------------------------------
use "${data}/aggregates_tfp_lp_klems", clear
keep year isocode type sector1 sector data_source ln_LP_ppp_emp_pwt  ln_gdp_pw_ppp ln_gdp_pc_ppp ln_rgdpe ln_rgdpo ln_rgdpna ln_ctfp ln_rtfpna ln_hc ln_ck ln_pop ln_RuleofLaw
order year isocode type sector1 sector data_source ln_LP_ppp_emp_pwt  ln_gdp_pw_ppp ln_gdp_pc_ppp ln_rgdpe ln_rgdpo ln_rgdpna ln_ctfp ln_rtfpna ln_hc ln_ck ln_pop ln_RuleofLaw
label variable sector1 "sector aggregate"
label variable sector  "sector NAICS2"
label variable ln_RuleofLaw "ln RuleofLaw"
compress
save, replace

*------------------------------------------------------------------------
use "${data}/sector_weights.dta", clear
label variable sector1 "sector aggregate"
label variable sector "sector NAICS2"
label variable theta "sectoral expenditure shares"
label variable theta_fr "sectoral expenditure shares (France)"
label variable theta_mso "sectoral expenditure shares (within type: manuf, serv, others)"
label variable theta_mso_fr "sectoral expenditure shares (within type: manuf, serv, others) (France)"
compress
save, replace
    

*------------------------------------------------------------------------
use "${data}/taxes_ratio.dta", clear
drop tax_ratio2 ln_tax_ratio2 tax_foreign2 tax_ratio

label variable iso2_ctry1 "destination country"
label variable isocode "iso2 (destination country)" 
label variable iso2_ctry2 "origin country" 
label variable BTT "0/1 Bilateral Tax Treaty" 
label variable corp_tax "Corporate Income Tax Rate (Source: OECD)"
label variable dividends_avg "average dividend rate at destination country" 
label variable dividends_min "minimum dividend rate at destination country" 
label variable dividends_max "maximum dividend rate destination country" 
label variable withholding_avg "average withholding rate at destination country" 
label variable withholding_min "minimum withholding rate at destination country"
label variable withholding_max "maximum withholding rate at destination country"
label variable tax_foreign_noBTT "foreign tax when BTT=0. Assumes min witholding tax" 
label variable tax_foreign_BTT "foreign tax when BTT=1. Assumes max witholding tax"  
label variable tax_foreign "tax_foreign_noBTT if BTT=0 and tax_foreign_BTT if BTT=1 " 
label variable ln_tax_ratio "ln of tax_foreign/corp_tax" 
order isocode iso2_ctry1 iso2_ctry2
compress	
save, replace

 *------------------------------------------------------------------------
use "${data}/oalpha_ctte_FR.dta", clear
label variable oalpha_klems "capital to labor ration (France)" 
compress
save, replace

